home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Scene 96
/
Scene 96 International Edition (Zyklop Software) (Disc 2) (1997).iso
/
misc
/
coding
/
pump_src
/
ph_fill.asm
< prev
next >
Wrap
Assembly Source File
|
1995-10-26
|
27KB
|
843 lines
;══════════════════════════════════════════════════════════════════════
; Rutina de rellenado Phong por environment mapping
;══════════════════════════════════════════════════════════════════════
.386
COMMENT % P1
+
/ \ (P1,P2,P3) dato
P4/-----\P2 P4 se calcula
/ /
/ /
/ /
//
+ P3
%
;ONLY_EVEN=1
MAX_X = 319
MIN_Y = 0
MAX_Y = 199
; Es arbitrario... pero seguro que despista a el que lo mire :)
TO_THE_RITE = 0EFh
TO_THE_LEFT = 05Ah
;▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒
_DATA SEGMENT PARA PUBLIC USE32 'DATA'
EXTRN _frame_buffer_adr:DWORD, _env_map_adr:DWORD
PUBLIC _ex1, _ey1, _ex2, _ey2, _ex3, _ey3
PUBLIC _u1, _u2, _u3, _v1, _v2, _v3
PUBLIC _frame_buffer_scan_width
; ─────────────────────────────────────────────────────────────────────
; Datos de entrada
; ─────────────────────────────────────────────────────────────────────
; Misc
_frame_buffer_scan_width DW 320
; Coordenadas proyectadas de los vértices
x1 LABEL WORD
_ex1 DD ?
y1 LABEL WORD
_ey1 DD ?
x2 LABEL WORD
_ex2 DD ?
y2 LABEL WORD
_ey2 DD ?
x3 LABEL WORD
_ex3 DD ?
y3 LABEL WORD
_ey3 DD ?
; Angulos esféricos de los tres vértices
u1v1 LABEL DWORD
_u1 DW ?
_v1 DW ?
u2v2 LABEL DWORD
_u2 DW ?
_v2 DW ?
u3v3 LABEL DWORD
_u3 DW ?
_v3 DW ?
; ─────────────────────────────────────────────────────────────────────
; Variables de la rutina
; ─────────────────────────────────────────────────────────────────────
; Incrementos de y
dy21 DD ?
dy31 DD ?
dy32 DD ?
; Punto P4
x4 DW ?
y4 DW ?
u4 DW ?
v4 DW ?
; Rastering
current_y DW ?
current_frame_buffer_pointer DD ?
scan_filling_dir DB ?, ?
long_side_u DW ?
long_side_v DW ?
long_side_x DD ?
short_side_x DD ?
long_side_inc_u DW ?
long_side_inc_v DW ?
long_side_inc_x DD ?
short_side_inc_x DD ?
pixel_inc_u DW ?
pixel_inc_v DW ?
_DATA ENDS
DGROUP GROUP _DATA
;▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒
_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
ASSUME CS:_TEXT, DS:_DATA
PUBLIC phong_fill_
phong_fill_ PROC
pushad
; Clipping sencillo en x
mov ax,[x1]
mov bx,[x2]
cmp ax,bx
jge short L1
xchg ax,bx
L1: cmp ax,[x3]
jge short L2
mov ax,[x3]
L2: cmp ax,0
jge short L3
jmp near ptr L34
L3: cmp bx,[x3]
jle short L4
mov bx,[x3]
L4: cmp bx,MAX_X
jle short L5
jmp near ptr L34
; Ordenar puntos tal que y1 <= y2 <= y3
L5: mov ax,[y1]
cmp ax,[y2]
jle short L6
mov eax,[_ex1]
mov ebx,[_ex2]
mov ecx,[_ey1]
mov edx,[_ey2]
mov [_ex2],eax
mov [_ex1],ebx
mov [_ey2],ecx
mov [_ey1],edx
mov eax,[u2v2]
mov ebx,[u1v1]
mov [u2v2],ebx
mov [u1v1],eax
L6: mov eax,[_ey2]
cmp eax,[_ey3]
jle short L7
mov eax,[_ex3]
mov ecx,[_ex2]
mov ebx,[_ey3]
mov edx,[_ey2]
mov [_ex2],eax
mov [_ex3],ecx
mov [_ey3],edx
mov [_ey2],ebx
mov eax,[u3v3]
mov ebx,[u2v2]
mov [u2v2],eax
mov [u3v3],ebx
L7: mov eax,[_ey1]
cmp eax,[_ey2]
jle short L8
mov eax,[_ex1]
mov ebx,[_ex2]
mov ecx,[_ey1]
mov edx,[_ey2]
mov [_ex2],eax
mov [_ex1],ebx
mov [_ey2],ecx
mov [_ey1],edx
mov eax,[u2v2]
mov ebx,[u1v1]
mov [u1v1],eax
mov [u2v2],ebx
L8:
; Clipping sencillo en y
mov eax,[_ey1]
cmp eax,MAX_Y
jle short L9
jmp near ptr L34
L9: mov eax,[_ey3]
cmp eax,MIN_Y
jge short L10
jmp near ptr L34
L10:
; Cálculo de delta(y)
mov eax,[_ey2]
sub eax,[_ey1]
jne short L11
jmp near ptr L35
L11: mov [dy21],eax
mov eax,[_ey3]
sub eax,[_ey1]
jne L12
inc eax
L12: mov [dy31],eax
mov eax,[_ey3]
sub eax,[_ey2]
mov [dy32],eax
L13:
; Inicializa (u,v) para scannear en las aristas
mov ax,[_u1]
mov [long_side_u],ax
mov ax,[_v1]
mov [long_side_v],ax
; Calcula los incrementos de (x,u,v) para rasterizar
mov ax,[_u3]
sub ax,[_u1]
movsx eax,ax
mov edx,eax
sar edx,31
idiv [dy31]
mov [long_side_inc_u],ax
mov ax,[_v3]
sub ax,[_v1]
movsx eax,ax
mov edx,eax
sar edx,31
idiv [dy31]
mov [long_side_inc_v],ax
mov eax,[_ex3]
sub eax,[_ex1]
shl eax,16
mov edx,eax
sar edx,31
idiv [dy31]
mov [long_side_inc_x],eax
mov eax,[_ex2]
sub eax,[_ex1]
shl eax,16
mov edx,eax
sar edx,31
idiv [dy21]
mov [short_side_inc_x],eax
; Inicializa (x,y) para el barrido de arriba abajo
mov eax,[_ex1]
shl eax,16
mov [long_side_x],eax
mov [short_side_x],eax
mov ax,[y1]
mov [current_y],ax
; Calcula los parámetros (u,v,x) del punto 4
mov ax,[_u3]
sub ax,[_u1]
movsx eax,ax
imul [dy21]
idiv [dy31]
add ax,[_u1]
mov [u4],ax
mov ax,[_v3]
sub ax,[_v1]
movsx eax,ax
imul [dy21]
idiv [dy31]
add ax,[_v1]
mov [v4],ax
mov ax,[x3]
sub ax,[x1]
movsx eax,ax
imul [dy21]
idiv [dy31]
add ax,[x1]
mov [x4],ax
; Vemos si el lado largo está a la izquierda o la derecha
mov ax,[x4]
cmp ax,[x2]
jl short L14 ; x4 < x2
je short L17 ; x4 == x2
jmp L18 ; x4 > x2
; ─────────────────────────────────────────────────────────────────────
; En las tres siguientes secciones, mutuamente exclusivas, se calculan
; la dirección en la que rellenar los scans y los incrementos de (z,w)
; para pixels avanzado en horizontal
; ─────────────────────────────────────────────────────────────────────
;**** El lado largo (P1-P3) está a la derecha
L14: mov [scan_filling_dir],TO_THE_RITE
movzx eax,[_u2]
movzx ebx,[u4]
sub eax,ebx
cdq
movsx ebx,[x2]
movsx ecx,[x4]
sub ebx,ecx
or ebx,ebx
jne short L15
inc ebx
L15:
idiv ebx
mov [pixel_inc_u],ax
movzx eax,[_v2]
movzx ebx,[v4]
sub eax,ebx
cdq
movsx ebx,[x2]
movsx ecx,[x4]
sub ebx,ecx
or ebx,ebx
jne short L16
inc ebx
L16: idiv ebx
mov [pixel_inc_v],ax
jmp L21
;**** El lado corto y el largo están montados
L17: mov [scan_filling_dir],TO_THE_RITE
mov [pixel_inc_v],1
mov [pixel_inc_u],1
jmp short L21
;**** El lado largo está a la izquierda
L18: mov [scan_filling_dir],TO_THE_LEFT
movzx eax,[_u2]
movzx ebx,[u4]
sub eax,ebx
cdq
movsx ebx,[x4]
movsx ecx,[x2]
sub ebx,ecx
or ebx,ebx
jne short L19
inc ebx
L19: idiv ebx
mov [pixel_inc_u],ax
movzx eax,[_v2]
movzx ebx,[v4]
sub eax,ebx
cdq
movsx ebx,[x4]
movsx ecx,[x2]
sub ebx,ecx
or ebx,ebx
jne short L20
inc ebx
L20: idiv ebx
mov [pixel_inc_v],ax
; ─────────────────────────────────────────────────────────────────────
L21:
; Calcula el puntero al frame_buffer
movsx eax,[current_y]
movzx ebx,[_frame_buffer_scan_width]
imul ebx
mov [current_frame_buffer_pointer],eax
; Calcula el número de scans a dibujar
mov cx,[y3]
sub cx,[y1]
or cx,cx
jnz L22
inc cx
;══════════════════════════════════════════════════════════════════════
; Bucle principal, una vez por scan
;══════════════════════════════════════════════════════════════════════
L22: push cx
; Comprueba si hemos llegado al punto de rotura (y = y2)
mov ax,[current_y]
cmp ax,[y2]
jne short L23
; Calcula el nuevo incremento de x por scan
mov eax,[_ex3]
sub eax,[_ex2]
shl eax,10H
cdq
mov ebx,[_ey3]
sub ebx,[_ey2]
idiv ebx
mov [short_side_inc_x],eax
L23:
IFDEF ONLY_EVEN
; Dibuja solo las pares
mov ax,[current_y]
test ax,1
jnz NEAR PTR L33
ENDIF
; Clipping del scan si está arriba o debajo
; de la zona de corte
cmp [current_y],MIN_Y
jl near ptr L33
cmp [current_y],MAX_Y
jg near ptr L33
; Hace que esi apunte al primer pixel del scan (frame_buffer)
mov edi,[current_frame_buffer_pointer]
mov eax,[long_side_x]
sar eax,16
add edi,eax
; Si estamos rellenando de derecha a izquierda, goto L28
cmp [scan_filling_dir],TO_THE_LEFT
je near ptr L28
; Obtiene los parámetros (u,v) para el primer pixel
mov bp,[long_side_u]
mov si,[long_side_v]
; Toma las x entre las que pintar el scan
mov ecx,[short_side_x]
sar ecx,10H
mov ebx,[long_side_x]
sar ebx,10H
; Hace clipping del scan, comprobando que esté
; totalmente fuera
cmp bx,MAX_X
jg near ptr L33
cmp cx,0
jl near ptr L33
; Clipping del scan por la derecha
cmp cx,MAX_X
jle short L24
mov cx,MAX_X
L24:
; Clipping por la izquierda
cmp bx,0
jge short L26
; Avanzando (u,v) y x por cada pixel
xor edx,edx
sub dx,bx
mov bx,0
add edi,edx
L25: add bp,[pixel_inc_u]
add si,[pixel_inc_v]
dec edx
jne short L25
L26:
; Calcula el número de pixels a rellenar
sub cx,bx
cmp cx,0
jge short L27
neg cx
L27:
inc cx
movzx ecx,cx
COMMENT %
; Clipping del scan si está arriba o debajo
; de la zona de corte
cmp [current_y],MIN_Y
jl near ptr L33
cmp [current_y],MAX_Y
jg near ptr L33
%
; Carga los registros con los valores necesarios
; para el inner loop
mov ebx,esi
shl ebx,16
mov bx,bp
mov dx,[pixel_inc_v]
shl edx,16
mov dx,[pixel_inc_u]
; Salta en medio del bucle desenrrollado con todo OK
mov eax,0140H
sub eax,ecx
sub edi,eax
; Multiplica eax por 18
;mov ecx,eax
mov ebp,eax
shl eax,4 ;*16
;shl ecx,2 ;*4
add ebp,ebp ;*2
;add eax,ecx
add eax,ebp
add eax,OFFSET Unrolled1
mov esi,[_env_map_adr]
add edi,[_frame_buffer_adr]
call eax
jmp near ptr L33 ; Cerrar el bucle de scans
L28: mov bp,[long_side_u]
mov si,[long_side_v]
mov ecx,[long_side_x]
sar ecx,16
mov ebx,[short_side_x]
sar ebx,16
cmp cx,0
jl near ptr L33
cmp bx,MAX_X
jg near ptr L33
cmp bx,0
jge short L29
mov bx,0
L29:
cmp cx,MAX_X
jle short L31
movzx edx,cx
mov ecx,MAX_X
sub edx,MAX_X
sub edi,edx
L30: add bp,[pixel_inc_u]
add si,[pixel_inc_v]
dec edx
jne short L30
L31:
sub cx,bx
cmp cx,0
jge short L32
neg cx
L32: inc cx
movzx ecx,cx
cmp [current_y],MIN_Y
jl near ptr L33
cmp [current_y],MAX_Y
jg near ptr L33
; Carga los registros con los valores necesarios
; para el inner loop
mov ebx,esi
shl ebx,16
mov bx,bp
mov dx,[pixel_inc_v]
shl edx,16
mov dx,[pixel_inc_u]
mov eax,0140H
sub eax,ecx
add edi,eax
; Multiplica eax por 18
;mov ecx,eax
mov ebp,eax
shl eax,4 ;*16
;shl ecx,2 ;*4
add ebp,ebp ;*2
;add eax,ecx
add eax,ebp
add eax,OFFSET Unrolled2
mov esi,[_env_map_adr]
add edi,[_frame_buffer_adr]
call eax
L33:
; Incrementa valores para el próximo scan
mov ax,[long_side_inc_u]
add [long_side_u],ax
mov ax,[long_side_inc_v]
add [long_side_v],ax
mov eax,[long_side_inc_x]
add [long_side_x],eax
mov eax,[short_side_inc_x]
add [short_side_x],eax
add [current_frame_buffer_pointer],0140H
inc [current_y]
pop cx
dec cx
jne near ptr L22 ; Loop!
L34: popad
ret
; Second possibility - cuando el triángulo tiene y1 = y2 <= y3
L35: mov ax,[x2]
cmp ax,[x1]
jge short L36
mov eax,[_ex1]
mov ecx,[_ex2]
mov ebx,[_ey1]
mov edx,[_ey2]
mov [_ex2],eax
mov [_ex1],ecx
mov [_ey1],edx
mov [_ey2],ebx
mov eax,[u2v2]
mov ecx,[u1v1]
mov [u2v2],ecx
mov [u1v1],eax
L36: mov ax,[_u1]
mov [long_side_u],ax
mov ax,[_v1]
mov [long_side_v],ax
mov eax,[_ey3]
sub eax,[_ey1]
or eax,eax
jnz @@DY31OK
inc eax
@@DY31OK:
mov [dy31],eax
mov ax,[_u3]
sub ax,[_u1]
cwde
cdq
idiv [dy31]
mov [long_side_inc_u],ax
mov ax,[_v3]
sub ax,[_v1]
cwde
cdq
idiv [dy31]
mov [long_side_inc_v],ax
mov eax,[_ex3]
sub eax,[_ex1]
shl eax,10H
cdq
idiv [dy31]
mov [long_side_inc_x],eax
mov eax,[_ex3]
sub eax,[_ex2]
shl eax,10H
cdq
idiv [dy31]
mov [short_side_inc_x],eax
mov eax,[_ex1]
shl eax,10H
mov [long_side_x],eax
mov eax,[_ex2]
shl eax,10H
mov [short_side_x],eax
mov ax,[y1]
mov [current_y],ax
; Calcula pixel_inc_u y pixel_inc_v
movzx eax,[_u2]
movzx ebx,[_u1]
sub eax,ebx
cdq
movsx ebx,[x2]
movsx ecx,[x1]
sub ebx,ecx
or ebx,ebx
jne short L37
inc ebx
L37: idiv ebx
mov [pixel_inc_u],ax
movzx eax,[_v2]
movzx ebx,[_v1]
sub eax,ebx
cdq
movsx ebx,[x2]
movsx ecx,[x1]
sub ebx,ecx
or ebx,ebx
jne short L38
inc ebx
L38: idiv ebx
mov [pixel_inc_v],ax
; Calcula el puntero al frame_buffer
movsx eax,[current_y]
movzx ebx,[_frame_buffer_scan_width]
imul ebx
mov [current_frame_buffer_pointer],eax
mov cx,[y3]
sub cx,[y1]
or cx,cx
jnz L39
inc cx
;══════════════════════════════════════════════════════════════════════
; Bucle principal, una vez por scan - y1 == y2
;══════════════════════════════════════════════════════════════════════
L39: push cx
IFDEF ONLY_EVEN
; Dibuja solo las pares
mov ax,[current_y]
test ax,1
jnz NEAR PTR L44
ENDIF
mov edi,[current_frame_buffer_pointer]
mov eax,[long_side_x]
sar eax,10H
add edi,eax
mov bp,[long_side_u]
mov si,[long_side_v]
mov ecx,[short_side_x]
sar ecx,10H
mov ebx,[long_side_x]
sar ebx,10H
cmp bx,MAX_X
jg near ptr L44
cmp cx,0
jl near ptr L44
cmp cx,MAX_X
jle short L40
mov cx,MAX_X
L40:
cmp bx,0
jge short L42
xor edx,edx
sub dx,bx
xor ebx,ebx
add edi,edx
L41: add bp,[pixel_inc_u]
add si,[pixel_inc_v]
dec dx
jne short L41
L42: sub cx,bx
cmp cx,0
jge short L43
neg cx
L43:
inc cx
movzx ecx,cx
cmp [current_y],MIN_Y
jl near ptr L44
cmp [current_y],MAX_Y
jg near ptr L44
mov ebx,esi
shl ebx,16
mov bx,bp
mov dx,[pixel_inc_v]
shl edx,16
mov dx,[pixel_inc_u]
mov eax,0140H
sub eax,ecx
sub edi,eax
; Multiplica eax por 18
;mov ecx,eax
mov ebp,eax
shl eax,4
;shl ecx,2
add ebp,ebp
;add eax,ecx
add eax,ebp
add eax,OFFSET Unrolled3
mov esi,[_env_map_adr]
add edi,[_frame_buffer_adr]
call eax
L44: mov ax,[long_side_inc_u]
add [long_side_u],ax
mov ax,[long_side_inc_v]
add [long_side_v],ax
mov eax,[long_side_inc_x]
add [long_side_x],eax
mov eax,[short_side_inc_x]
add [short_side_x],eax
add [current_frame_buffer_pointer],0140H
inc [current_y]
pop cx
dec cx
jne near ptr L39
popad
ret
; Vamos a despistar un poco...
DB 'Square root of negative number', 0
DB 'Ray traced too long', 0
ALIGN 16
Unrolled1:
Unrolled3:
; UNROLLED LOOP
I = 0
REPT 320
mov eax,ebx
shr eax,16
mov al,bh
add ebx,edx
mov al,[esi+eax]
;mov [edi+I],al
db 88h, 87h
dd I
I = I + 1
ENDM
; END UNROLLED LOOP
retn
ALIGN 16
Unrolled2:
; UNROLLED LOOP
I = 0
REPT 320
mov eax,ebx
shr eax,16
mov al,bh
add ebx,edx
mov al,[esi+eax]
;mov [edi+I],al
db 88h, 87h
dd I
I = I - 1
ENDM
; END UNROLLED LOOP
retn
COMMENT %
Es igual que el unrolled1, mirar arriba
Unrolled3:
; UNROLLED LOOP
I = 0
REPT 320
mov eax,ebx
shr eax,16
mov al,bh
mov al,[esi+eax]
;mov [edi+I],al
db 88h, 87h
dd I
add ebx,edx
I = I + 1
ENDM
; END UNROLLED LOOP
retn
%
ENDP
_TEXT ENDS
END